Date: 10/03/2020
Creator: Iza Romanowska
Context: Pottery data analysis from the site of Jerash, Jordan. Excavations conducted by the joint Danish-German team under the leadership of prof. Rubina Raja and prof. Achim lichtenberger.
Type of data: Pottery counts + associated parameters
Chronology: Roman to Late Antiquity
Version: 3.0, all data included.
Files necessary to run the analysis: site data: 'data.csv', list of topsoil and disturbed contexts: 'topsoil.xlsx', list of dated contexts: 'chronology.xlsx', list of contexts with Middle Islamic remains "MI_layers.xlsx"
This Jupyter notebook constitutes the Supplementary Information C, of the following publication: ...
Toggle the button above to hide/display the code.
# notebook setup
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
plt.style.use('ggplot')
from IPython.display import display
from IPython.display import Image
#sns.set_palette('Spectral')
sns.set(palette = 'Spectral', font_scale = 1)
Remove all columns that are entirely empty and print the summary of the data types.
df = pd.read_csv('data.csv', delimiter = ';')
# there are a few columns with no values
df = df.dropna(axis=1, how='all')
#print(df1.columns.difference(df.columns)) # check columns that have no values
### Sanity checks, run on all new data files
df.head()
df.info()
#df[df.WareTypeID.isnull() == 1]
#df[df.Local.isnull() == 1]
#df.columns
#print(len(df))
#df.Trench.unique()
#df[df['EvidenceID'] == 1]
#print(len(df[df.Evidence =='0']))
#df.Evidence.unique())
print('Total number of artefacts: ', df.AmountSum.sum())
print('Number of retained artefacts: ', df.AmountKept.sum())
print('Number of trenches: ', len(df.Trench.unique()))
print('Number of unique Evidences: ', df.groupby('Trench')['Evidence'].unique().apply(len).sum())
#print(df.head())
#print(df.columns)
#print(df.PartOfObject.unique())
topsoil = pd.read_excel('topsoil.xlsx', index_col = 'Trench')
# tidy up to get rid of funny values
topsoil['Top soil'].loc['B'] = '1, 2, 54' # removed extra words
topsoil['Top soil'].loc['R'] = '1, 2' # moved 1 to here
#topsoil['Top soil'].loc['M'] = '0' # replaced nan with
topsoil['Top soil'] = topsoil['Top soil'].astype('str').str.split(', ')
topsoil = topsoil['Top soil']
topsoil = topsoil.to_dict()
#not all trenches are in the list of topsoils, so we'll just add them manually
for i in (set(df.Trench.unique()) - set(topsoil.keys())):
topsoil[i] = [""]
#print(topsoil)
print ('number of rows original file: ', len(df))
df_surf = df[~df.Evidence.isin(['0'])]
print ('number of rows without evidence 0: ',len(df_surf))
# create a new data frame with the same column types
df_nots = pd.DataFrame(columns = df.columns).astype(df.dtypes.to_dict())
# iterate through the topsoil file and remove all disturbed contexts
for key,values in topsoil.items():
dummydf = df_surf[df_surf['Trench'] == key] # for each trench
dummydf = dummydf[~dummydf['Evidence'].isin(values)] # keep all rows except the topsoil
df_nots = pd.concat([df_nots,dummydf])
# it follows the list of topsoils so we order the index back
df_nots = df_nots.sort_index()
print('number of rows after all topsoil layers removed: ',len(df_nots))
# check for the trench names - in case some trenches were missing from the topsoil doc
#print(set(df.Trench.unique())- set(df_nots.Trench.unique()))
# Remove rows where 'Evidence' is 'clean' or 'mix'
mix = ['Clean', 'mix', 'Mix', 'Surfa']
df_clean = df_nots[~df_nots['Evidence'].isin(mix)]
print('number of rows after "Clean" and "mix" layers removed: ',len(df_clean))
print('The difference: ',len(df)-len(df_clean))
print('Amount of pottery. Total for the site: ', df.AmountSum.sum())
print('Amount of pottery. Topsoil removed: ',df_clean.AmountSum.sum())
print('The difference: ', df.AmountSum.sum()-df_clean.AmountSum.sum())
"""
#### Visual check on the data after the topsoil has been removed, remove quotes to run
df_check = df.groupby('Trench')['Evidence'].unique().reset_index()
df_check['Length_original'] = df_check['Evidence'].str.len()
df_check1 = df_clean.groupby('Trench')['Evidence'].unique().reset_index()
df_check1['Length_cleaned'] = df_check1['Evidence'].str.len()
df_check2 = pd.merge(df_check, df_check1, left_on = 'Trench', right_on ='Trench', how = 'outer')
#df_check2.info()
df_check2.Evidence_y.replace(np.NaN, '[]', inplace=True)
df_check2['differences'] = [list(set(a) - (set(b))) for a, b in zip(df_check2.Evidence_x, df_check2.Evidence_y)]
df_check2['original_topsoil'] = df_check2['Trench'].map(topsoil)
df_check2
"""
#The highest values are worth checking because a typo in a digit can have significant repercussions for the analysis. Here are the top 10.
#df[['Trench', 'Evidence', 'Find', 'AmountSum']].sort_values(by = 'AmountSum').tail(10)
df_kept = df_clean.groupby('Trench')[['AmountKept', 'AmountDumped', 'AmountSum']].sum()
df_kept['PrcKept'] = df_kept['AmountKept'] / df_kept['AmountSum']
df_kept['PrcDumped']= df_kept['AmountDumped'] / df_kept['AmountSum']
df_kept[['PrcKept', 'PrcDumped']].plot(kind = 'bar', stacked = 'true', figsize = (15, 10))
This is figure 3 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set(palette = 'Spectral', font_scale = 1.3)
figure = plt.figure(figsize=(15,10))
ax = sns.barplot(y = 'Trench',
x = 'AmountSum',
data = df_clean,
estimator= sum,
ci = None,
color = 'steelblue',
order = sorted(df_clean.Trench.unique().tolist())) # order alphabetically
ax.set(xlabel='Number of pottery sherds', ylabel = 'Trench', title = 'Amount of archaeological material per trench')
plt.savefig('figures/3.png', dpi = 600)
df_clean.groupby(['Trench'], as_index = False)['AmountSum'].sum()#.T # .T to see the table horizontally
Most contexts produced only a few artefacts (1-2) and the vast majority under 20 but there are some contexts with thousends of pottery sherds.
display(df_clean[['AmountKept', 'AmountDumped', 'AmountSum']].describe())
fig, ax =plt.subplots( figsize = (10,6))
# Arch material on a log scale
sns.distplot(df['AmountSum'].dropna(), bins = 10000)
ax.set_title('Amount Sum on a log scale')
ax.set_xscale("log")
# Tidy up the layout
plt.tight_layout()
This shows trenches with higher number of 'large' evidences.
tr1 = df_clean.groupby('Trench').size().reset_index(name ='No_of_Evidences')
tr2 = df_clean.groupby(['Trench'], as_index = False)['AmountSum'].sum()
trs = tr1.merge(tr2, right_on = 'Trench', left_on ='Trench')
#display(trs)
fig, ax1 = plt.subplots(figsize = (15,10))
sns.barplot(y = trs.No_of_Evidences, x = 'Trench', data=trs, ax = ax1, alpha = 0.5)
ax1.set_ylabel('Number of evidences per trench')
ax2 = ax1.twinx()
trs.plot(y = 'AmountSum', x = 'Trench', color = 'slateblue', ax = ax2)
ax2.set_ylabel('Amount of material')
ax2.set_ylim(0, None)
ax2.grid(False)
Observation: The amount of material coming from different trenches varies significantly but the number of evidences and the amount of material are roughly correlated. All analysis should be done on absolute counts and on frequencies calculated for each trench. The following trenches have some Evidences with high amount of material in respect to the number of Evidences: J14, X and N.
This is figure 4 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set(palette = 'Spectral', font_scale = 1.3)
ax, fig = plt.subplots(figsize=(15,15))
order = sorted(df_clean.Trench.unique().tolist())
ax = sns.boxplot(x = "Trench",
y = "AmountSum",
data = df_clean,
color = 'steelblue',
order = order)
ax = sns.scatterplot(x = order,
y=df_clean.groupby("Trench")["AmountSum"].mean(),
s = 200, label = 'mean value')
ax.set_yscale("log")
ax.set_ylabel("Number of artefacts per Evidence")
ax.set_ylim(0, None)
plt.savefig('figures/4.png', dpi = 600)
print("Maxium number of artefacts per Evidence: ", df_clean.AmountSum.max())
print('And the Evidence with max artefacts is: ')
display(df_clean.loc[df_clean['AmountSum'] == df_clean.AmountSum.max()])
print("Minimum number of artefacts per Evidence: ", df_clean.AmountSum.min())
print("Average number of artefacts per Evidence: ", df_clean.AmountSum.mean())
print("Median number of artefacts per Evidence: ", df_clean.AmountSum.median())
# Subset data to ceramics only
print('Total number of data entities', len(df_clean))
df_pot = df_clean[df_clean['Group'] == 'Ceramics']
print('Total number of all ceramics: ', len(df_pot))
# Subset data to pottery only
df_pot = df_pot[df_pot['SG1'] == 'Pottery']
print('Total number of pottery only: ', len(df_pot))
df_pot.head()
We add all SGs into one number to provide a unique identifier for each pottery type. We can use different thresholding levels (e.g., only SG1-3) or classify each type separately.
df_pot['Type'] = (df[['SG1ID', 'SG2ID','SG3ID', 'SG4ID', 'SG5ID']]
.fillna(0)
.astype('int')
.astype('str')
.apply(lambda x: ''.join(x), axis=1))
# Use the following line to check whether correct
#df_pot[['SG1ID', 'SG2ID','SG3ID', 'SG4ID', 'SG5ID', 'Type']].head(20)
df_pot['Type_verbose'] = (df[['SG1', 'SG2','SG3', 'SG4', 'SG5']]
.fillna('')
.astype('str')
.apply(lambda x: ', '.join(x), axis=1))
This is how it looks like. E.g., Type 25500 is SG2: 2 ('Pottery'), SG3: 5('Table Ware'), SG4: 5('Plain Ware'), SG5: 0 (no value), SG6: 0 (no value).
# Keep the pottery columns
col_names = ['Trench', 'Evidence', 'Find','AmountSum', 'WareType', 'Local', 'Type', 'Type_verbose']
df_pot = df_pot[col_names]
df_pot.head()
df_pot = df_pot.groupby(['Trench','Evidence','Local', 'WareType', 'Type']).agg(
{'AmountSum':'sum',
'Find':'first',
# 'WareType':'first',
# 'Local':'first',
# 'Type':'first',
'Type_verbose':'first'}).reset_index()
print('Length original: ', len(df_clean), 'Length now: ', len(df_pot))
print('Sum AmountSum original: ',df_clean[df_clean['SG1'] == 'Pottery']['AmountSum'].sum(),
'Sum AmountSum now: ', df_pot.AmountSum.sum())
# check that although the length of the df changed the total amount of pottery was maintained
df_pot.head()
# read file
xl = pd.ExcelFile('chronology.xlsx')
df_new = pd.DataFrame()
names = xl.sheet_names
count = 0
# collect data from all the sheets and combine together
for i in names:
j = i[-1]
if i == "2015 - Trench J": #trench J and trench Q are divided, we maintain that division
j = "J15"
if i == "2014 - Trench J":
j = "J14"
if i == "2015 - Trench Qg":
j = "Qg"
if i == "2015 - Trench Qh":
j = "Qh"
for y in df_pot.Trench.unique():
if y in j:
# print(y, j)
df_temp = pd.read_excel('chronology.xlsx',
sheet_name = i,
skiprows=2,
header = 0,
convert_float = True,
na_values = '-')
count += len(df_temp)
df_temp.rename(columns={'Evidence no. ':'Evidence'},inplace=True)
# here we autofill because there were many gaps
df_temp.Evidence.fillna(method='ffill', inplace = True)
df_temp['Evidence'] = df_temp['Evidence'].astype(int).astype(str)
df_temp['Trench'] = j
df_new = pd.concat([df_new, df_temp], ignore_index = True)
df_new.head()
df_new.drop(columns =['Comments']).groupby(['Trench', 'Evidence']).count().sum()#.sum()
# use this line if you want to have dating for each category summed up separately
# eg. if AmountSum 10 and dating REB then R gets extra 10 and EB gets extra 10
# you obviously end up with way more pottery than there actually is
#df_new.update(df_new[['Pottery','14C','Glass','Coin']].fillna(''))
# use this line if you want to have dating as it is, e.g., REB will stay REB.
df_new = df_new.fillna('').groupby(['Trench','Evidence']).agg(
{'Pottery': lambda col: ''.join (col) ,
'14C': lambda col: ''.join(col),
'Glass': lambda col: ''.join(col),
'Coin': lambda col: ''.join(col),
'Comments': 'first'}).reset_index()
df_f = pd.merge(df_pot, df_new, on = ['Trench', 'Evidence'], how = 'left')
# a bit of housekeeping, order columns, replace empty cells with nans
columns = ['Trench', 'Evidence','AmountSum', 'Type', 'Type_verbose', 'Local','WareType','14C','Coin', 'Glass', 'Pottery', 'Comments']
df_f= df_f[columns]
from collections import OrderedDict
for i in columns[7:-1]:
df_f[i] = df_f[i].fillna('').map(lambda x: ''.join(OrderedDict.fromkeys(x).keys()), na_action=None)
df_f = df_f.replace(['', '?'], np.nan)
df_f = df_f.replace('BEIR', 'RBEI')
#Check that the totals are correct (if you use the first method they' obviously wont)
print('Checks for coherence, all values should be the same unless the first method was used')
print('Sum AmountSum original: ',df_clean[df_clean['SG1'] == 'Pottery']['AmountSum'].sum(), 'Sum AmountSum pot: ', df_pot.AmountSum.sum())
print('Sum AmountSum original: ',df_clean[df_clean['SG1'] == 'Pottery']['AmountSum'].sum(), 'Sum AmountSum pot with chrono: ', df_f.AmountSum.sum())
If same values then all dimensions of data have been preserved.
Dating convention: R - Roman B - Bizantine EI - Early Islamic
Plus combinations when one context was dated to multiple periods:
This chronology comes from two sources: A list of Middle Islamic contexts and a list of Evidences where Middle Islamic pottery has been found mixed with other types
# first contexts where MI pottery is present
MI_contexts = df_f[df_f['WareType'].isin(["Unpainted Mamluk", "HMGPW"])]
# we do it this way to ensure that the MI dating is on all instances of that archaeological context
MI_contexts["MI_pot"] = 'EIMI'
MI_contexts = MI_contexts[['Trench', 'Evidence', 'MI_pot']]
df_f = pd.merge(df_f, MI_contexts, how ='left', on = ['Trench', 'Evidence'])
# data with MI contexts
MI = pd.read_csv('MI_layers.csv', dtype = object)
df_MI = pd.merge(df_f, MI, how = 'left', on = ['Trench', 'Evidence'])
df_MI["MI"].fillna(df_MI["MI_pot"], inplace=True) # MI contexts stay unchanged, mixed contexts are added
df_f = df_MI.drop('MI_pot', axis = 1)
order = [ 'R', 'RB','B', 'BEI', 'EI', 'REI', 'RBEI']
pal_chrono = sns.color_palette('Spectral', 12)[2:3] + sns.color_palette('Spectral', 12)[4:7]+sns.color_palette('Spectral', 12)[8:9]+sns.color_palette('RdBu', 11)[-2:]
print(order)
sns.palplot(pal_chrono)
chrono_pal_dict = dict(zip(order, pal_chrono))
all_order = ['R', 'RB', 'B', 'BEI', 'EI', 'EIMI', 'MI', 'RBEI','BEIMI', 'RBEIMI']
#sns.palplot(sns.color_palette('Spectral', 12))
pal_chrono_all = sns.color_palette('Spectral', 12)[2:3] + sns.color_palette('Spectral', 12)[4:7]+sns.color_palette('Spectral', 12)[8:11]+sns.color_palette('RdBu', 11)[-1:] * 3
print ( ' R', ' RB', ' B', ' BEI', ' EI', ' EIMI', ' MI', ' RBEI',' BEIMI', 'RBEIMI',)
sns.palplot(pal_chrono_all)
all_chrono_pal_dict = dict(zip(all_order, pal_chrono_all))
Please note that chronology that was not classified (e.g., exact dates) was removed.
X-axis: chronology. Y-axis: number of artefacts.
plt.figure(figsize = (20,20))
order_noMI = [ 'R', 'RB', 'B', 'BEI', 'EI', 'RBEI']
pl = 1
dating_types = ['Pottery','14C', 'Coin', 'Glass']
for i in dating_types:
plt.subplot(2,2, pl)
ax = sns.barplot(x = i, y = 'AmountSum', estimator = sum, data = df_f, ci = None, order = order_noMI, palette = all_chrono_pal_dict)
ax.set(xlabel='', ylabel = '')
plt.xticks(rotation = 90)
plt.title(i, fontsize=20)
plt.tight_layout()
pl+=1
#plt.savefig('figures/chronology.png', dpi = 300)
X-axis: chronology. Y-axis: number of artefacts
sns.set(font_scale = 1.9)
plt.figure(figsize = (20,20))
pl = 1
order_small = [ 'R', 'RB', 'B', 'BEI', 'EI']
dating_types = ['Pottery','14C', 'Coin', 'Glass']
for i in dating_types:
plt.subplot(2,2, pl)
ax = sns.barplot(x = i, y = 'AmountSum', estimator = sum, data = df_f, ci = None, order = order_small, palette = chrono_pal_dict)
ax.set(xlabel='', ylabel = '')
plt.xticks(rotation = 90)
plt.title(i)
plt.tight_layout()
pl+=1
#plt.savefig('figures/CiC_chronology.png', dpi = 600)
# This shows the total number of pieces that has been dated using pottery and that do not have a very long chrology - i.e. RBEI
df_dated = df_f[df_f['Pottery'].isin(order_small)]
df_dated.groupby('Pottery')['AmountSum'].sum()#.sum()
This is figure 6 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set(palette = 'Paired', font_scale = 1.3)
# Add up the number of artefacts dated to each period using each method
dating = []
for i in dating_types:
df_dated = df_f[df_f[i].isin(order_small)]
# print(df_dated.groupby(i)['AmountSum'].sum()) #check the values
dating.append(df_dated.groupby(i)['AmountSum'].sum().reindex(order).values)
# plot using stackplot
plt.figure(figsize=(15,8))
plt.stackplot(order,dating, labels=dating_types, alpha=0.9 )
plt.legend(loc='upper right')
plt.xlabel('Chronology')
plt.ylabel('Number of artefacts')
plt.title('Number of artefacts dated with each method')
plt.savefig('figures/6.png', dpi = 600)
plt.show()
If different categories bring different chronological markers we merge them into a mixed context (e.g., RBEI).
def sort_crono(string):
a = ""
for i in ['R', 'B', 'EI', 'MI']:
if i in string:
a = a+i
return a
all_dating_types = ['Pottery','14C', 'Coin', 'Glass', 'MI']
df_f['combined'] = df_f[all_dating_types].fillna('').apply(lambda row: ''.join(set(row.values.astype(str))), axis=1).apply(sort_crono)
# Some of the wider chronology need re-labeling.
# E.g., If a context has both R and MI evidence then it should just be grouped as the widest chronology RBEIMI
chrono_dict = {"RBMI":"RBEIMI", "REI":"RBEI", "REIMI": "RBEIMI", "":"no dating"}
df_f['combined'] = df_f['combined'].replace(chrono_dict)
df_f.groupby('combined')['AmountSum'].sum()
# this affects obly about 4000 artefacts
df_f['combined'].unique()
plt.figure(figsize=(15,10))
ax = sns.barplot(x = 'combined', y = 'AmountSum', estimator = sum, data = df_f, ci = None, order = all_order, palette = all_chrono_pal_dict)
ax.set_xlabel('Chronology')
ax.set_ylabel('Number of Artefacts')
ax.set_title('Dating of Artefacts using all Methods')
X-axis: number of artefacts; Y-axis: pottery provenance.
sns.set(palette = 'Spectral', font_scale = 1.5)
plt.figure(figsize = (15,10))
ax=sns.barplot(y = 'Local',
x = 'AmountSum',
data = df_f,
estimator= sum,
order = ['Local', 'Import', 'Regional'],
ci = None)
ax.set(xlabel='', ylabel = '')
a,b,c = [str(x) for x in df_pot.groupby(['Local'])['AmountSum'].sum().sort_index()]
# correct the order of labels
a,b,c = b,a,c
place = 0.035
for j in [a,b,c]:
plt.text(10000,place, j, fontsize = 14, color='black')
place +=1
plt.tight_layout()
#plt.savefig('Local_total_large.png', dpi = 300)
totals = df_f.groupby(['Local'])['AmountSum'].sum()
pcts = totals.groupby(level=0).apply(lambda x: x / float(sum(df_f.AmountSum))*100).to_frame()
combined = pd.merge (totals.to_frame(), pcts, left_index = True, right_index = True)
combined.rename(columns = {'AmountSum_x':'Amount Sum','AmountSum_y': 'Percentage'}, inplace=True)
display(combined)
X-axis: Chronology; Y-axis: Number of artefacts.
sns.set(palette = 'Spectral', font_scale = 1.5)
plt.figure(figsize = (20,20))
pl = 1
order = [ 'R', 'RB', 'B', 'BEI', 'EI']
for i in ['14C', 'Coin', 'Glass', 'Pottery']:
plt.subplot(2,2, pl)
ax = sns.barplot(x = i, y = 'AmountSum', estimator = sum, data = df_f, hue = 'Local', ci = None, order = order)
ax.set(xlabel='', ylabel = '')
plt.xticks(rotation = 90)
plt.title(i)
plt.tight_layout()
pl+=1
#plt.savefig('chronology.png')
for i in dating_types:
pivot_df = df_f.pivot_table( columns='Local', index = i, values='AmountSum', aggfunc = 'sum')
#pivot_df = pivot_df.applymap(lambda x: x / x.sum())
pivot_df['sum'] = pivot_df.sum(axis = 1)
pivot_df.apply(lambda x: 100 * x / pivot_df['sum'])
display(pivot_df)
X-axis: Chronology; Y-axis: number of artefacts. Please note that the y axis of the middle plot is on logarithmic scale.
pal99 = [(0.99715494040753561, 0.91180315263360245, 0.60107650903498655),(0.88535178777393309, 0.31903114186851211, 0.29042675893886966), (0.98731257208765866, 0.64736639753940783, 0.36424452133794688)]
def pic_maker(data):
# print (data)
sns.set(palette = 'Spectral', font_scale = 1.5)
pal99 = [(0.99715494040753561, 0.91180315263360245, 0.60107650903498655),(0.88535178777393309, 0.31903114186851211, 0.29042675893886966), (0.98731257208765866, 0.64736639753940783, 0.36424452133794688)]
#plt.figure(figsize = (15,15))
order = [ 'R', 'RB', 'B', 'BEI', 'EI', 'RBEI']
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,10))
#Right graph
ax1 = sns.barplot(x = data, y = 'AmountSum', hue = 'Local',data = df_f, estimator= sum, ci = None, ax = ax1, order = order)
ax1.set(xlabel='', ylabel = '')
ax1.set_title (data + ' dating: absolute numbers')
ax1.legend().set_title('')
# Left graph
ax2 = sns.barplot(x = data, y = 'AmountSum', hue = 'Local',data = df_f, estimator= sum, ci = None, ax = ax2, order = order)
ax2.set(xlabel='', ylabel = '')
ax2.set_yscale('log')
ax2.set_title ('Logarithmic scale')
ax2.legend_.remove()
freqs_evid = (df_f.groupby([data,'Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
# .loc[order] # this is not supported any more if any of the labels are missing
.plot(kind='bar', # plot
stacked=True,
ax = ax3,
# figsize = (20,10),
legend = False,
color = pal99,
ylim = (0,1.0),
title = ""))
ax3.set_title ('Proportion')
ax3.set(xlabel='', ylabel = '')
#locs, labels = xticks()
plt.xticks(rotation = 360)
plt.xticks(rotation = 90)
ax3.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
plt.savefig(data + '_chrono.png')
return f
This is figure 4 in Romanowska et al. 2021. A study of the centuries-long reliance on local ceramics in Jerash through full quantification and simulation.
order = [ 'R', 'RB', 'B', 'BEI', 'EI']
ax1 = pic_maker('14C')
ax2 = pic_maker('Coin')
ax3 = pic_maker('Glass')
ax4 = pic_maker('Pottery')
#plt.savefig('chronoAll.png')
pic_maker('combined')
#plt.savefig('provenance.png', dpi = 300)
This is figure 17 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set(font_scale = 1.2)
fig, ax = plt.subplots(figsize=(15,8))
plt.axvspan(xmin = 6.5, xmax = 10, facecolor='b', alpha=0.3)
(df_f.set_index('combined').groupby(['combined','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
.loc[all_order]
.plot(kind='bar', # plot
stacked=True,
ax = ax,
figsize = (15,8),
legend = False,
color = pal99,
ylim = (0,1.0),
title = "Proportion between local, regional and imported pottery"))
ax.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
plt.xlabel("")
plt.legend(bbox_to_anchor=(1.0, 1), loc=2)
plt.savefig('figures/17.png', dpi = 600, bbox_inches = "tight")
X-axis: number of artefacts; Y-axis: Chronology.
# remove just plain ware
df_NoPlain = df_f[~df_f.WareType.isin( ['Plain'])]
print('Total number origial: ', len(df_f),'AmountSum: ', df_f['AmountSum'].sum(),'Number without Plain Ware: ', len(df_NoPlain), 'AmountSum: ', df_NoPlain['AmountSum'].sum())
sns.set(palette = 'Spectral', font_scale = 1.5)
plt.figure(figsize = (20,10))
ax = sns.barplot(y = 'combined',
x = 'AmountSum',
hue = 'Local',
data = df_NoPlain,
estimator= sum,
ci = None)
ax.set(xlabel='', ylabel = '')
plt.legend(bbox_to_anchor=(1.0, 1), loc=2)
plt.title('Dating by Pottery without Plain pottery')
X-axis: Trench; Y-axis: number of artefacts.
sns.set(palette = 'Spectral', font_scale = 1.5)
plt.figure(figsize = (20,10))
ax = sns.barplot(x = 'Trench', y = 'AmountSum', hue = 'Local', data = df_f, estimator= sum, ci = None)
plt.xticks(rotation = 90)
ax.set(xlabel='', ylabel = '')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncol = 1)
X-axis: Trench; Y-axis: proportion of artefacts.
#sns.set(palette = pal1, font_scale = 1.5)
#sns.set_palette(pal1, 3)
# a bit of wiggle with the colours to make them consistent with previous plots
pal = sns.color_palette('Spectral')[:3]
a, b, c = pal
pal = c, a, b
sns.set( font_scale = 1.5)
sns.set_palette(pal)
b = list('KPVBJN')
data_trenches1 = df_NoPlain [df_NoPlain['Trench'].isin(b)]
#fig, ax = plt.subplots(1,2)
df_f.name = 'Whole Assemblage'
df_NoPlain.name = 'Plain Wares Removed'
#df_NoPlain2.name = 'Three Selected Plain Wares Removed'
for i in ([df_f, df_NoPlain]):
ax = (i.groupby(['Trench','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back (there must be a better way...)
# .loc[b] # order the bars
.plot(kind='bar', # plot
stacked=True,
#colormap = pal1,
figsize = (20,10)))
ax.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
plt.ylim([0,1])
plt.title(i.name)
#plt.savefig('CiC_'+ str(i.name) + '.png', dpi = 600)
left = df_pot.groupby(['Trench','Local'])['AmountSum'].sum()
right = df_pot.groupby(['Trench'])['AmountSum'].sum()
trench_prov = pd.merge(left.to_frame(), right.to_frame(), left_index = True, right_index = True).reset_index()
trench_prov = trench_prov.rename(columns = {'AmountSum_x':'Amount Sum','AmountSum_y': 'Total'})
trench_prov['Percentage'] = trench_prov['Amount Sum'] / trench_prov['Total'] * 100
cm = sns.light_palette("green", as_cmap=True)
s = trench_prov.style.background_gradient(cmap=cm)
display(s)
#you can also do it this way:
#pivot_df = df_f.pivot_table( columns='Local', index = 'Trench', values='AmountSum', aggfunc = 'sum')
#pivot_df['sum'] = pivot_df.sum(axis = 1)
#pivot_df.apply(lambda x: 100 * x / pivot_df['sum'])
X-axis: Trench; Y-axis: proportion of artefacts.
This is figure 18 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
df_pot.Trench.unique()
#plt.figure(figsize = (10, 20))
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
# plot 1 - the earthquake trenches: K, P and V
b = list('KPV')
data_trenches1 = df_pot[df_pot['Trench'].isin(b)]
ax1 = (data_trenches1.groupby(['Trench','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
.plot(kind='bar', # plot
stacked=True,
ax = ax1,
legend = None,
title = 'Earthquake trenches'
))
ax1.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
ax1.set(xlabel='', ylabel = '')
ax1.set_ylim([0,1])
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=360)
# plot 2 - the dense fill trenches: B, J and N
c = list('BJN')
# We will put the J trenches together because they are are of similar type of context (just excavated over 2 years)
combineJ = df_pot.copy()
combineJ.Trench = combineJ.Trench.replace({"J14":"J", "J15":"J"})
data_trenches2 = combineJ[combineJ['Trench'].isin(c)]
ax2 =(data_trenches2.groupby(['Trench','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
.plot(kind='bar', # plot
stacked=True,
ax = ax2,
title = 'Dense ceramic fills'
))
ax2.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
ax2.set_ylim([0,1])
ax2.set(xlabel='', ylabel = '')
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=360)
handles, labels = plt.gca().get_legend_handles_labels()
order2 = [2,1,0]
plt.legend([handles[idx] for idx in order2],[labels[idx] for idx in order2], bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
plt.savefig('figures/18.png')
This is figure 2 in Romanowska et al. 2021. A study of the centuries-long reliance on local ceramics in Jerash through full quantification and simulation.
#plt.figure(figsize = (10, 20))
f, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(20,10), gridspec_kw = {'width_ratios':[0.6, 2, 2]})
# plot 1 - total proportion on the site
pivot_df = df_f.pivot_table( columns='Local', values='AmountSum', aggfunc = 'sum')
pivot_df = pivot_df.applymap(lambda x: x / df_f['AmountSum'].sum())
pivot_df.plot(kind='bar', stacked = True, ax = ax0, legend=False, title = 'Total')
ax0.set(xlabel='', ylabel = '')
ax0.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
ax0.set_ylim([0,1])
ax0.set_xticks([])
# plot 2 - the earthquake trenches: K, P and V
b = list('KPV')
data_trenches1 = df_pot[df_pot['Trench'].isin(b)]
ax1 = (data_trenches1.groupby(['Trench','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
.plot(kind='bar', # plot
stacked=True,
ax = ax1,
legend = None,
title = 'Earthquake trenches'
))
ax1.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
ax1.set(xlabel='', ylabel = '')
ax1.set_ylim([0,1])
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=360)
# plot 3 - the dense fill trenches: B, J and N
c = list('BJN')
data_trenches1 = df_pot[df_pot['Trench'].isin(c)]
ax2 =(data_trenches1.groupby(['Trench','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
.plot(kind='bar', # plot
stacked=True,
ax = ax2,
title = 'Dense ceramic fills'
))
ax2.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
ax2.set_ylim([0,1])
ax2.set(xlabel='', ylabel = '')
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=360)
handles, labels = plt.gca().get_legend_handles_labels()
order2 = [2,1,0]
plt.legend([handles[idx] for idx in order2],[labels[idx] for idx in order2], bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
#plt.savefig('Proportion_trenches_large.png')
data_trenchesV = df_pot[df_pot['Trench']== "V"]
data_v = data_trenchesV.groupby('Local')["AmountSum"].sum().reset_index()
data_v["freq"] = data_v.AmountSum / data_v.AmountSum.sum()
data_v
X-axis: number of artefacts; Y-axis: Pottery types.
This is figure 8 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set( font_scale = 1.3)
plt.figure(figsize = (15,12))
errors_wares= {'Grey Ware':'Grey ware','GreyWare':'Grey ware', 'Buff Ware':'Buff ware', 'Coarse Ware': 'Coarse ware', "Green Ware":"Green ware", "Late Roman ware":"LRRW", "ARS":"ARSW", "Roman sigillata":"Terra sigillata"}
df_f= df_f.replace(errors_wares)
order = df_f.groupby('WareType')["AmountSum"].sum().sort_values(ascending=False)
ax=sns.barplot(y = 'WareType',
x = 'AmountSum',
data = df_f,
order = order[order > 50].index,
color = 'steelblue',
estimator= sum,
ci = None)
ax.set(xlabel='Number of artefacts', ylabel = '')
#plt.xticks(rotation= 45, horizontalalignment = "right")
plt.title('Distribution of wares (over 50 sherds)')
#ax.set_xscale('log')
plt.savefig('figures/8.png', dpi = 600, bbox_inches = "tight")
Without Plain ware
df_NoPlain = df_NoPlain.replace('Red ware/Reddish Brown ware', 'Red ware')
sns.set( font_scale = 1.5)
plt.figure(figsize = (20,10))
order = df_NoPlain.groupby('WareType')["AmountSum"].sum().sort_values(ascending=False)
ax=sns.barplot(y = 'WareType',
x = 'AmountSum',
data = df_NoPlain,
order = order[order > 10].index,
color = 'steelblue',
estimator= sum,
ci = None)
ax.set(xlabel='Number of artefacts', ylabel = '')
plt.title('Distribution of wares without plain')
Plain, Grey, Coarse and Local wares were removed in the figure. Their numbers are so high it is difficult to see the distribution of the rest of the pottery assemblage (see the first Pottery types plot below). Their counts are included below in the table. Also, wares represented by less than 10 artefacts were removed.
X-axis: number of artefacts; Y-axis: Ware types (note that the 4 most common wares have been removed).
This is figure 10 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
# remove 5 common wares
df_NoPlain1 = df_f[~df_f.WareType.isin( ['Plain', 'Local', 'Grey ware', 'Coarse ware', 'Red ware/Reddish Brown ware'])]
df_f['WareType'] = df_f['WareType'].replace('Red ware/Reddish Brown ware', 'Red ware')
#df_NoPlain1 = df_NoPlain1.replace('Red ware/Reddish Brown ware', 'Red ware')
df_f["plainChange"] = ['Plain' if i in ['Plain', 'Local', 'Grey ware', 'Coarse ware','Red ware'] else 'No Plain' for i in df_f['WareType']]
plt.figure()
ax = (df_f.groupby(['combined','plainChange'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back (there must be a better way...)
.reindex(all_order+['no dating'])
.plot(kind='bar', # plot
stacked=True,
figsize = (20,10)))
ax.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
plt.ylim([0,1])
plt.title('Proportions between Plain and Distinctive wares over time')
ax.set(xlabel='')
#ax.set_xticklabels(['Unspecified pottery', 'Amphora', 'Storage and food preparation', 'Coarse ware', 'Cooking ware', 'Furnishing and specialised', 'Hand Made Geometric Painted', 'Tableware'])
#plt.title("")
plt.xticks(rotation = 90)
plt.savefig('figures/10.png', dpi = 600, bbox_inches = "tight")
(df_f.groupby(['combined','plainChange'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back (there must be a better way...)
.reindex(all_order+['no dating']))
This is figure 11 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set( font_scale = 1.5)
plt.figure(figsize = (20,10))
order = df_NoPlain1.groupby('WareType')["AmountSum"].sum().sort_values(ascending=False)
ax=sns.barplot(y = 'WareType',
x = 'AmountSum',
data = df_NoPlain1,
order = order[order > 10].index,
color = 'steelblue',
estimator= sum,
ci = None)
ax.set(xlabel='Number of artefacts', ylabel = '')
plt.title('Distribution of distinctive wares')
plt.savefig("figures/11.png", dpi = 600, bbox_inches = "tight")
This is figure 9 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
import pandas
from pandas.plotting import parallel_coordinates
sns.set( font_scale = 1.5)
sns.set_palette(sns.color_palette("Set2_r"))
flow_data = df_f[df_f.WareType.isin( ['Plain', 'Local', 'Grey ware', 'Coarse ware','Red ware'])].pivot_table(columns = ['WareType'], index = ["combined"],values = 'AmountSum', aggfunc = sum).T.reset_index()
flow_data = flow_data[['WareType', 'R', 'RB', 'B', 'BEI', 'EI', 'EIMI','MI', 'RBEI', 'BEIMI', 'RBEIMI']].fillna(0)
# Make the plot
fig = plt.figure(figsize=(15,10))
parallel_coordinates(flow_data, 'WareType', colormap = 'viridis', linewidth = 3)
#parallel_coordinates(flow_data, flow_data.index, colormap=plt.get_cmap("Set2"))
plt.axvspan(xmin = 6, xmax = 9, facecolor='b', alpha=0.3)
plt.ylabel('Number of artefacts')
plt.title('Change over time of most common ware types')
plt.legend(bbox_to_anchor=(0.0, 1), loc=2)
plt.savefig('figures/9.png', dpi = 600, bbox_inches = "tight")
plt.show()
order = df_NoPlain1.groupby('WareType')["AmountSum"].sum().sort_values(ascending=False)
distinctive_data = df_NoPlain1[df_NoPlain1.WareType.isin(order[order > 500].index)]
flow_data = distinctive_data.pivot_table(columns = ['WareType'], index = ["combined"],values = 'AmountSum', aggfunc = sum).T.reset_index()
flow_data = flow_data[['WareType', 'R', 'RB', 'B', 'BEI', 'EI', 'EIMI','MI', 'RBEI', 'BEIMI', 'RBEIMI']].fillna(0)
# Make the plot
fig = plt.figure(figsize=(15,10))
parallel_coordinates(flow_data, 'WareType', colormap = 'viridis', linewidth = 3)
#parallel_coordinates(flow_data, flow_data.index, colormap=plt.get_cmap("Set2"))
plt.axvspan(xmin = 6, xmax = 9, facecolor='b', alpha=0.3)
plt.ylabel('Number of artefacts')
plt.title('Change over time of distinctive ware types')
plt.legend(bbox_to_anchor=(0.0, 1), loc=2)
plt.savefig('figures/12.png', dpi = 600, bbox_inches = "tight")
plt.show()
"""# Alternative visualisations
coarse_data = df_f[df_f.WareType.isin( ['Plain', 'Local', 'Grey ware', 'Coarse ware'])]
# Make the plot
fig = plt.figure(figsize=(15,10))
sns.pointplot( x = 'combined', y = "AmountSum", hue = 'WareType', estimator = sum, data = coarse_data,order = ['R', 'RB', 'B', 'BEI', 'EI', 'REI','RBEI'] )
#plt.ylab
#parallel_coordinates(flow_data, flow_data.index, colormap=plt.get_cmap("Set2"))
plt.show()
########################
coarse_data = df_f[df_f.WareType.isin(order[order > 200].index)]
# Make the plot
fig = plt.figure(figsize=(15,10))
ax = sns.pointplot( x = 'combined',
y = "AmountSum",
hue = 'WareType',
estimator = sum,
data = coarse_data,
order = ['R', 'RB', 'B', 'BEI', 'EI'],
ci = None)
ax.set(xlabel='', ylabel = 'Number of artefacts')
plt.legend(bbox_to_anchor=(0.0, 1), loc=2)
plt.show()"""
wares = df_f.groupby('WareType')['AmountSum'].sum().sort_values(ascending = False).reset_index()
wares['Percentage'] = wares['AmountSum'] / df_f.AmountSum.sum() * 100
cm = sns.light_palette("green", as_cmap=True)
s = wares.style.background_gradient(cmap=cm)
display(s)
wares.to_excel('wares.xlsx')
# Percentage of four most common wares together
wares.iloc[:4,2].sum()
plt.figure(figsize = (20,15))
ax = sns.barplot(y = 'Type_verbose', x = 'AmountSum', data = df_f, estimator= sum, ci = None)
plt.xticks(rotation = 90)
plt.title('Pottery Type Counts')
ax.set(xlabel='', ylabel = '')
pot_types = df_f.groupby('Type_verbose')['AmountSum'].sum().sort_values(ascending = False).reset_index()
pot_types['Percentage'] = pot_types['AmountSum'] / df_f.AmountSum.sum() * 100
cm = sns.light_palette("green", as_cmap=True)
s = pot_types.style.background_gradient(cmap=cm)
display(s)
pot_types.to_excel('pot_types.xlsx')
print(pot_types.iloc[:4,2].sum())
Here the plain pottery is eliminated by removing specific types of ceramics,e.g., 'Coarse Ware', 'Table Ware, Plain', 'Cooking Ware', etc.
df_NoPlain2 = df_f[~df_f.Type.isin( ['25500', '26000','2121000', '27000'])]
print('Total number original: ', len(df_f),
'AmountSum: ', df_f['AmountSum'].sum(),
'Number without the Plain types: ', len(df_NoPlain2),
'AmountSum: ', df_NoPlain2['AmountSum'].sum())
order = df_NoPlain2.groupby(['Type_verbose'])['AmountSum'].sum().sort_values(ascending=False)
plt.figure(figsize = (20,10))
ax = sns.barplot(x = 'Type_verbose',
y = 'AmountSum',
data = df_NoPlain2,
estimator= sum,
color = 'steelblue',
order = order[order > 20].index,
ci = None)
plt.xticks(rotation = 90)
#plt.title('Pottery types counts without the Plain Tableware')
ax.set(xlabel='', ylabel = '')
Also print all instances of 'unknown' for excavators to check.
df_f.groupby('Type_verbose')['AmountSum'].sum()
#df_f[['Type_high','Type_high_verbose' ]][30:100]
#print(df_f[['Type_high', 'Type_high_verbose']].drop_duplicates())
#df_test = df_f[['Type', 'Type_verbose']].drop_duplicates()
#print(len(df_test['Type_verbose'].unique()))
#print(len(df_test['Type_verbose']))
#print(df_test.Type[df_test.Type.duplicated(keep=False)])
#print(df_test)
#df_f.loc[3707]
# there were a number of typos in the data, here we correct these entries (note that the data file remains unchanged, this is done of the copy of the data)
corr_dict = {'Pottery, Cooking Ware, , , ':'26000'}
corr_dict2 = {'2121000':'Pottery, Coarse Wares, , , '}
df_f['Type'] = df_f['Type_verbose'].map(corr_dict).fillna(df_f['Type'])
df_f['Type_verbose'] = df_f['Type'].map(corr_dict2).fillna(df_f['Type_verbose'])
df_f['Type'].iloc[3706] = '2121000'
df_f['Type_verbose'].iloc[3706] = 'Pottery, Coarse Wares, , , '
#df_f.iloc[3692]
df_f['Type_high'] = df_f['Type'].astype(str).str[:2]
#print(df_f.Type.unique())
temp_dic = {'Pottery Unknown': 'Pottery ', "Pottery Coarse Ware 121":"Pottery Coarse Ware", "Pottery Coarse Wares":"Pottery Coarse Ware"}
df_f['Type_high_verbose'] = df_f['Type_verbose'].apply(lambda x: ''.join(x.split(',')[:2]))
#print(df_f['Type_high_verbose'].unique())
df_f['Type_high_verbose_simplified'] = df_f['Type_high_verbose'].map(temp_dic).fillna(df_f['Type_high_verbose'])
# Print all instances of 'Pottery Unknown' for the excavators to check
df_f[df_f['Type_verbose'].str.contains("Unknown")]
X-axis: Pottery types (High level division) ; Y-axis: number of artefacts.
This is figure 13 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
sns.set( font_scale = 1.3)
sns.set_palette("Spectral", 8)
fig = plt.figure(figsize =(12, 8))
ax = sns.barplot(x = 'Type_high_verbose_simplified' ,
y = 'AmountSum',
estimator = sum,
order = df_f.groupby('Type_high_verbose_simplified')['AmountSum'].sum().sort_values(ascending=False).index,
data = df_f,
color = 'steelblue',
ci = None)
ax.set(xlabel='', ylabel = 'Number of artefacts')
# we manually assign labels because the ones in the data are a bit long, comment out the line below to see
ax.set_xticklabels(['Tableware', 'Coarse ware', 'Cooking ware', 'Storage and food preparation', 'Furnishing and specialised', 'Amphora','Unspecified pottery', 'Hand Made Geometric Painted'])
plt.xticks(rotation = 90)
plt.title('Distribution of functional types of pottery')
#ax.set_yscale('log')
plt.savefig('figures/13.png', dpi = 600, bbox_inches = "tight")
print(df_f.groupby("Type_high_verbose_simplified")['AmountSum'].sum().sort_values(ascending=False))
#df_f.groupby(["Type_high_verbose_simplified"])['AmountSum'].sum().sort_values(ascending=False).plot(kind='bar')
pot_types = df_f.groupby('Type_high_verbose_simplified')['AmountSum'].sum().sort_values(ascending = False).reset_index()
pot_types['Percentage'] = pot_types['AmountSum'] / df_f.AmountSum.sum() * 100
cm = sns.light_palette("green", as_cmap=True)
s = pot_types.style.background_gradient(cmap=cm)
display(s)
pot_types.to_excel('pot_types_hig.xlsx')
plt.figure(figsize = (20,20))
tableWare = df_NoPlain[df_NoPlain['Type_verbose'].str.contains('Pottery, Table Ware')]
sns.barplot(x = 'WareType', y = 'AmountSum', estimator = sum, data = tableWare, ci = None)
plt.xticks(rotation = 90)
plt.figure(figsize = (20,20))
tableWare = df_NoPlain[df_NoPlain['Type_verbose'].str.contains('Coarse Ware')]
sns.barplot(x = 'WareType', y = 'AmountSum', estimator = sum, data = tableWare)
plt.xticks(rotation = 90)
plt.figure(figsize = (20,20))
tableWare = df_NoPlain[df_NoPlain['Type_verbose'].str.contains('Cooking Ware')]
sns.barplot(x = 'WareType', y = 'AmountSum', estimator = sum, data = tableWare)
plt.xticks(rotation = 90)
Note that the y-axis is truncated
This is figure 8 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
#tableWare = df_NoPlain[df_NoPlain['Type_verbose'].str.contains('Cooking Ware')]
order = df_f.groupby('WareType')["AmountSum"].sum().sort_values(ascending=False)
ware_data = df_f[df_f['WareType'].isin(order[order > 500].index)]
plt.figure(figsize = (15,10))
ax= sns.barplot(x = 'WareType', y = 'AmountSum', hue = 'Type_high_verbose_simplified', estimator = sum, ci = None, order = order[order > 500].index, data = ware_data)
ax.legend(title = '', loc = 1)
ax.set_ylabel('Number of artefacts')
ax.set_xlabel('')
plt.ylim(None, 100000)
plt.text(-0.25, 82000, 'Plain: 465333', rotation = 90, fontsize=12)
#ax.set_xscale("log")
plt.savefig("figures/15.png", dpi = 600, bbox_inches = "tight")
df_f.groupby(['WareType', 'Type_high_verbose_simplified'])['AmountSum'].sum()
X-axis: Number of artefacts divided by chronology. Note this is on a log scale.; Y-axis: Type of pottery.
sns.set(palette = 'Spectral', font_scale = 1.2)
plt.figure(figsize = (20,20))
ax = sns.barplot(y = 'Type_verbose',
x = 'AmountSum',
hue = 'Pottery',
data = df_f,
estimator= sum,
ci = None)
ax.set(xlabel='', ylabel = '')
ax.set_xscale("log")
plt.legend(bbox_to_anchor=(1.0, 1), loc=2)
plt.title('Pottery types distribution without plain wares')
order = df_f.groupby('Type_high_verbose_simplified')["AmountSum"].sum().sort_values(ascending=False)
flow_data = df_f[df_f['Type_high_verbose_simplified'].isin(order[order > 500].index)]
flow_data = flow_data.pivot_table(columns = ['Type_high_verbose_simplified'], index = ["combined"],values = 'AmountSum', aggfunc = sum).T.reset_index()
flow_data = flow_data[['Type_high_verbose_simplified', 'R', 'RB', 'B', 'BEI', 'EI', 'EIMI','MI', 'RBEI', 'BEIMI', 'RBEIMI']].fillna(0)
# Make the plot
fig = plt.figure(figsize=(15,10))
parallel_coordinates(flow_data, 'Type_high_verbose_simplified', colormap = 'viridis', linewidth = 3)
plt.axvspan(xmin = 6, xmax = 9, facecolor='b', alpha=0.3)
plt.ylabel('Number of artefacts')
plt.title('Change over time of pottery types')
plt.legend(bbox_to_anchor=(0.0, 1), loc=2)
plt.show()
This is figure 14 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
import re
order = df_NoPlain2.groupby('Type_verbose')["AmountSum"].sum().sort_values(ascending=False)
distinctive_data = df_NoPlain2[df_NoPlain2['Type_verbose'].isin(order[order > 200].index)]
flow_data = distinctive_data.pivot_table(columns = ['Type_verbose'],
index = ["combined"],
values = 'AmountSum',
aggfunc = sum).T.reset_index()
flow_data = flow_data[['Type_verbose', 'R', 'RB', 'B', 'BEI', 'EI', 'EIMI','MI', 'RBEI', 'BEIMI', 'RBEIMI']].fillna(0)
change_dic = {'Pottery, Amphora, , , ': 'Amphora',
'Pottery, Coarse Ware Storage Jars / Food Perpetration, Closed, Pithos / Dolium, ':"Pithos / Dolium",
'Pottery, Cooking Ware, Closed, Cooking Pot, ': "Closed Cooking Pot",
'Pottery, Cooking Ware, Open, Casserole, ':"Open Casserole",
'Pottery, Domestic furnishing & Specialised Vessel, Specialised Vessel, Lamp, ':"Lamp",
'Pottery, Table Ware, , , ':"Table Ware",
'Pottery, Table Ware, Fine Ware, , ':'Fine Ware',
'Pottery, Table Ware, Fine Ware, Open, Bowl':'Bowl'}
flow_data.Type_verbose =flow_data.Type_verbose.replace(change_dic)
# Make the plot
fig = plt.figure(figsize=(15,10))
parallel_coordinates(flow_data, 'Type_verbose', colormap = 'viridis', linewidth = 3)
#parallel_coordinates(flow_data, flow_data.index, colormap=plt.get_cmap("Set2"))
plt.axvspan(xmin = 6, xmax = 9, facecolor='b', alpha=0.3)
plt.ylabel('Number of artefacts')
plt.title('Change over time of distinctive pottery types')
plt.legend(bbox_to_anchor=(0.0, 1), loc=2)
plt.savefig('figures/14.png', dpi = 600, bbox_inches = "tight")
plt.show()
X-axis: Pottery types (High level division) ; Y-axis: proportion of artefacts.
This is figure 3 in Romanowska et al. 2021. A study of the centuries-long reliance on local ceramics in Jerash through full quantification and simulation.
sns.set( font_scale = 1.5)
sns.set_palette(pal)
# the pottery designation is taken from the top two levels and we merged 'Pottery' and 'Pottery Unknown', thus -"simiplified"
ax = (df_f.groupby(['Type_high_verbose_simplified','Local'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back (there must be a better way...)
.plot(kind='bar', # plot
stacked=True,
figsize = (20,10)))
plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
plt.ylim([0,1])
ax.set(xlabel='')
ax.set_xticklabels(['Unspecified pottery', 'Amphora', 'Storage and food preparation', 'Coarse ware', 'Cooking ware', 'Furnishing and specialised', 'Hand Made Geometric Painted', 'Tableware'])
#plt.title("")
plt.xticks(rotation = 90)
#plt.savefig('CiC_Local_Pot_type_loc_sim.png', dpi = 600, bbox_inches = "tight")
# df_f.groupby(['Type_high_verbose_simplified','Local'])["AmountSum"].sum()
Note: this is virtually the same with or without the plain wares; only the scale on the y axis is different - it's in tens of thousands not hundreds of thousands.
X-axis: Trench; Y-axis: Number of artefacts within dated contexts.
from matplotlib.colors import ListedColormap
sns.set(font_scale = 1.8)
chrono_cmap = ListedColormap(pal_chrono)
evid = (df_f.groupby(['Trench','Pottery'])["AmountSum"]
.sum()
.unstack()
.plot(kind='bar',
stacked=True,
colormap = chrono_cmap,
figsize = (20,10)))
#sns.barplot(x = 'Trench', hue= evid.columns[1:], data = evid)
plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 2)
plt.title('Dating by trench')
#plt.xticks(rotation = 90)
X-axis: Trench; Y-axis: Proportion of artefacts within dated contexts.
sns.set(font_scale=1.5)
freqs_evid = (df_f.groupby(['Trench','Pottery'])["AmountSum"]
.sum() # sum amounts per trench per evidence
.unstack() # crt the data frame
.T # transpose
.apply(lambda x: x / x.sum()) # calculate frequencies per evidence
.T # transpose back
.plot(kind='bar', # plot
stacked=True,
colormap = chrono_cmap,
figsize = (20,10)))
plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
plt.title('Chronology of contexts using Pottery dating')
plt.ylim([0,1])
#plt.savefig('CiC_chronology_trenches.png', dpi = 600)
This is figure 7 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
pal_chrono_cmap = ListedColormap(pal_chrono_all[:-3])
order = [ 'R', 'RB', 'B', 'BEI', 'EI', 'REI','RBEI', 'no dating']
test = df_f.pivot_table(columns = 'Trench', index = 'combined', values = 'AmountSum', aggfunc = sum).fillna(0)#
test = test.rename(index={'':'no dating'}).reindex(all_order[:-3]).drop(["Qbd", "Qg", "Qh",'H'], axis = 1)
ax = test.iloc[:,:].apply(lambda x: x / x.sum()).T.plot(
kind='bar',
colormap = pal_chrono_cmap,
figsize = (20,10),# plot
stacked=True)
ax.set_yticklabels([str(x) +'%' for x in range(0,101,20)])
plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0., ncol = 1)
plt.title('Dating by trench')
plt.ylim([0,1])
plt.savefig('figures/7.png', dpi = 600, bbox_inches = "tight")
plt.figure (figsize = (15, 10))
df_clean.groupby('PartOfObject')['AmountSum'].sum().plot(kind = 'barh')
#plt.xscale("log")
right = df_clean.groupby('PartOfObject')['AmountSum'].sum().apply(lambda x: x/df_clean.AmountSum.sum()*100)
left = df_clean.groupby('PartOfObject')['AmountSum'].sum()
PoO = pd.merge(left.to_frame(), right.to_frame(), left_index = True, right_index = True).reset_index()
PoO = PoO.rename(columns = {'AmountSum_x':'Number of Artefacts','AmountSum_y': 'Percentage'})
PoO
This is figure 16 in Romanowska et al. 2021. Trends in Ceramic Assemblages from the Northwest Quarter of Gerasa/Jerash, Jordan.
from matplotlib.colors import LogNorm
import math
sns.set(font_scale = 1.1)
# Data preparation
ct_preservation =pd.pivot_table(data = df_clean, values = 'AmountSum', index = "PartOfObject", columns = "WareType", aggfunc = sum).T
# change 'WareType' to SG2, SG3 to see the distribution for functional types
# Remove categories that are less than 10
big_r = ct_preservation.apply(np.mean, axis = 1) > 10
big_c = ct_preservation.apply(np.mean, axis = 0) > 10
b_ct_pre = ct_preservation[big_r]
b_ct_pre = b_ct_pre.loc[:, big_c]
# Reorder the data so that the visualisation is easier to read
b_ct_pre = b_ct_pre[['Body', 'Rim', 'Handle', 'Base', 'Lid', 'Profile', 'Spout']]
b_ct_pre = b_ct_pre.sort_values(by='Body', ascending=0)
# plotting
plt.figure(figsize=(15,7))
# The difference between the body and the rest is so large that the visualisation on a standard scale is meaningless.
# Thus the colors are set on a log scale. Note, because the min of AmountSum was zero we added 1 when scaling the color scale (this makes virtually no difference in the shades).
log_norm = LogNorm(vmin=df_clean.AmountSum.min(), vmax=df_clean.AmountSum.max())
cbar_ticks = [math.pow(10, i) for i in range(math.floor(math.log10(df_clean.AmountSum.min()+1)), 1+math.ceil(math.log10(df_clean.AmountSum.max())))]
sns.heatmap(b_ct_pre, cmap='Blues', norm=log_norm, cbar_kws={"ticks": cbar_ticks})
plt.xlabel("")
plt.ylabel("")
plt.title("Frequencies of different object parts per ware")
plt.savefig('figures/16.png', dpi = 600, bbox_inches = "tight")
FIN